<?php

defined('IN_HOUSE5') or exit('No permission resources.');
class collection {
protected static $url,$config;
public static function get_content($url,$config,$page = 0) {
set_time_limit(300);
static $oldurl = array();
$page = intval($page) ?intval($page) : 0;
if ($html = self::get_html($url,$config)) {
if (empty($page)) {
if ($config['title_rule']) {
$title_rule = self::replace_sg($config['title_rule']);
$data['title'] = self::replace_item(self::cut_html($html,$title_rule[0],$title_rule[1]),$config['title_html_rule']);
}
if ($config['author_rule']) {
$author_rule =  self::replace_sg($config['author_rule']);
$data['author'] = self::replace_item(self::cut_html($html,$author_rule[0],$author_rule[1]),$config['author_html_rule']);
}
if ($config['comeform_rule']) {
$comeform_rule =  self::replace_sg($config['comeform_rule']);
$data['comeform'] = self::replace_item(self::cut_html($html,$comeform_rule[0],$comeform_rule[1]),$config['comeform_html_rule']);
}
if ($config['time_rule']) {
$time_rule =  self::replace_sg($config['time_rule']);
$data['time'] = strtotime(self::replace_item(self::cut_html($html,$time_rule[0],$time_rule[1]),$config['time_html_rule']));
}
if (empty($data['time'])) $data['time'] = SYS_TIME;
if ($config['customize_config'] = string2array($config['customize_config'])) {
foreach ($config['customize_config'] as $k=>$v) {
if (empty($v['rule'])) continue;
$rule =  self::replace_sg($v['rule']);
$data[$v['en_name']] = self::replace_item(self::cut_html($html,$rule[0],$rule[1]),$v['html_rule']);
}
}
}
if ($config['content_rule']) {
$content_rule =  self::replace_sg($config['content_rule']);
$data['content'] = self::replace_item(self::cut_html($html,$content_rule[0],$content_rule[1]),$config['content_html_rule']);
}
if (in_array($page,array(0,2)) &&!empty($config['content_page_start']) &&!empty($config['content_page_end'])) {
$oldurl[] = $url;
$tmp[] = $data['content'];
$page_html = self::cut_html($html,$config['content_page_start'],$config['content_page_end']);
if ($config['content_page_rule'] == 2 &&in_array($page,array(0,2)) &&$page_html) {
preg_match_all('/<a[^>]*href=[\'"]?([^>\'" ]*)[\'"]?[^>]*>([^<\/]*)<\/a>/i',$page_html,$out);
if (!empty($out[1]) &&!empty($out[2])) {
foreach ($out[2] as $k=>$v) {
if (strpos($v,$config['content_nextpage']) === false) continue;
if ($out[1][$k] == '#') continue;
$out[1][$k] = self::url_check($out[1][$k],$url,$config);
if (in_array($out[1][$k],$oldurl)) continue;
$oldurl[] = $out[1][$k];
$results = self::get_content($out[1][$k],$config,2);
if (!in_array($results['content'],$tmp)) $tmp[] = $results['content'];
}
}
}
if ($config['content_page_rule'] == 1 &&$page == 0 &&$page_html) {
preg_match_all('/<a[^>]*href=[\'"]?([^>\'" ]*)[\'"]?/i',$page_html,$out);
if (is_array($out[1]) &&!empty($out[1])) {
$out = array_unique($out[1]);
foreach ($out as $k=>$v) {
if ($out[1][$k] == '#') continue;
$v = self::url_check($v,$url,$config);
$results = self::get_content($v,$config,1);
if (!in_array($results['content'],$tmp)) $tmp[] = $results['content'];
}
}
}
$data['content'] = $config['content_page'] == 1 ?implode('[page]',$tmp) : implode('',$tmp);
}
if ($page == 0) {
self::$url = $url;
self::$config = $config;
$data['content'] = preg_replace('/<img[^>]*src=[\'"]?([^>\'"\s]*)[\'"]?[^>]*>/ie',"self::download_img('$0', '$1')",$data['content']);
if (empty($page) &&!empty($data['content']) &&$config['down_attachment'] == 1) {
h5_base::load_sys_class('attachment','',0);
$attachment = new attachment('collection','0',get_siteid());
$data['content'] = $attachment->download('content',$data['content'],$config['watermark']);
}
}
return $data;
}
}
protected static function download_img($old,$out) {
if (!empty($old) &&!empty($out) &&strpos($out,'://') === false) {
return str_replace($out,self::url_check($out,self::$url,self::$config),$old);
}else {
return $old;
}
}
public static function url_list(&$config,$num = '') {
$url = array();
switch ($config['sourcetype']) {
case '1':
$num = empty($num) ?$config['pagesize_end'] : $num;
for ($i = $config['pagesize_start'];$i <= $num;$i = $i +$config['par_num']) {
$url[$i] = str_replace('(*)',$i,$config['urlpage']);
}
break;
case '2':
$url = explode("\r\n",$config['urlpage']);
break;
case '3':
case '4':
$url[] = $config['urlpage'];
break;
}
return $url;
}
public static function get_url_lists($url,&$config) {
if ($html = self::get_html($url,$config)) {
if ($config['sourcetype'] == 4) {
$xml = h5_base::load_sys_class('xml');
$html = $xml->xml_unserialize($html);
if (h5_base::load_config('system','charset') == 'gbk') {
$html = array_iconv($html,'utf-8','gbk');
}
$data = array();
if (is_array($html['rss']['channel']['item']))foreach ($html['rss']['channel']['item'] as $k=>$v) {
$data[$k]['url'] = $v['link'];
$data[$k]['title'] = $v['title'];
}
}else {
$html = self::cut_html($html,$config['url_start'],$config['url_end']);
$html = str_replace(array("\r","\n"),'',$html);
$html = str_replace(array("</a>","</A>"),"</a>\n",$html);
preg_match_all('/<a([^>]*)>([^\/a>].*)<\/a>/i',$html,$out);
$out[1] = array_unique($out[1]);
$out[2] = array_unique($out[2]);
$data = array();
foreach ($out[1] as $k=>$v) {
if (preg_match('/href=[\'"]?([^\'" ]*)[\'"]?/i',$v,$match_out)) {
if ($config['url_contain']) {
if (strpos($match_out[1],$config['url_contain']) === false) {
continue;
}
}
if ($config['url_except']) {
if (strpos($match_out[1],$config['url_except']) !== false) {
continue;
}
}
$url2 = $match_out[1];
$url2 = self::url_check($url2,$url,$config);
$data[$k]['url'] = $url2;
$data[$k]['title'] = strip_tags($out[2][$k]);
}else {
continue;
}
}
}
return $data;
}else {
return false;
}
}
protected static function get_html($url,&$config) {
if (!empty($url) &&$html = @file_get_contents($url)) {
if ($syscharset != $config['sourcecharset'] &&$config['sourcetype'] != 4) {
$html = iconv($config['sourcecharset'],CHARSET.'//IGNORE',$html);
}
return $html;
}else {
return false;
}
}
protected static function cut_html($html,$start,$end) {
if (empty($html)) return false;
$html = str_replace(array("\r","\n"),"",$html);
$start = str_replace(array("\r","\n"),"",$start);
$end = str_replace(array("\r","\n"),"",$end);
$html = explode(trim($start),$html);
if(is_array($html)) $html = explode(trim($end),$html[1]);
return $html[0];
}
protected static function replace_item($html,$config) {
if (empty($config)) return $html;
$config = explode("\n",$config);
$patterns = $replace = array();
$p = 0;
foreach ($config as $k=>$v) {
if (empty($v)) continue;
$c = explode('[|]',$v);
$patterns[$k] = '/'.str_replace('/','\/',$c[0]).'/i';
$replace[$k] = $c[1];
$p = 1;
}
return $p ?@preg_replace($patterns,$replace,$html) : false;
}
protected static function replace_sg($html) {
$list = explode(L('[content]'),$html);
if (is_array($list)) foreach ($list as $k=>$v) {
$list[$k] = str_replace(array("\r","\n"),'',trim($v));
}
return $list;
}
protected static function url_check($url,$baseurl,$config) {
$urlinfo = parse_url($baseurl);
$baseurl = $urlinfo['scheme'].'://'.$urlinfo['host'].(substr($urlinfo['path'],-1,1) === '/'?substr($urlinfo['path'],0,-1) : str_replace('\\','/',dirname($urlinfo['path']))).'/';
if (strpos($url,'://') === false) {
if ($url[0] == '/') {
$url = $urlinfo['scheme'].'://'.$urlinfo['host'].$url;
}else {
if ($config['page_base']) {
$url = $config['page_base'].$url;
}else {
$url = $baseurl.$url;
}
}
}
return $url;

?>